{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# More JOIN operations\n",
    "\n",
    "![rel](https://sqlzoo.net/w/images/1/10/Movie-er.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n"
     ]
    }
   ],
   "source": [
    "import findspark\n",
    "import pandas as pd\n",
    "findspark.init()\n",
    "\n",
    "SVR = '192.168.31.31'\n",
    "from pyspark.sql import SparkSession\n",
    "\n",
    "sc = (SparkSession.builder.appName('app07') \n",
    "      .master(f'spark://{SVR}:7077') \n",
    "      .config('spark.sql.warehouse.dir', f'hdfs://{SVR}:9000/user/hive/warehouse') \n",
    "      .config('spark.cores.max', '4') \n",
    "      .config('spark.executor.instances', '1') \n",
    "      .config('spark.executor.cores', '2') \n",
    "      .config('spark.executor.memory', '10g') \n",
    "      .enableHiveSupport().getOrCreate())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. 1962 movies\n",
    "\n",
    "List the films where the **yr** is 1962 [Show **id, title**]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "movie = sc.read.table('sqlzoo.movie')\n",
    "actor = sc.read.table('sqlzoo.actor')\n",
    "casting = sc.read.table('sqlzoo.casting')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10212</td>\n",
       "      <td>A Kind of Loving</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10329</td>\n",
       "      <td>A Symposium on Popular Songs</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10347</td>\n",
       "      <td>A Very Private Affair (Vie PrivÃ©e)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10648</td>\n",
       "      <td>An Autumn Afternoon</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10868</td>\n",
       "      <td>Atraco a las tres</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>21324</td>\n",
       "      <td>Two Half Times in Hell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>21462</td>\n",
       "      <td>Varan the Unbelievable</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>21494</td>\n",
       "      <td>Village of Daughters</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>21673</td>\n",
       "      <td>What Ever Happened to Baby Jane?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>21761</td>\n",
       "      <td>Who's Got the Action?</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>86 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       id                                title\n",
       "0   10212                     A Kind of Loving\n",
       "1   10329         A Symposium on Popular Songs\n",
       "2   10347  A Very Private Affair (Vie PrivÃ©e)\n",
       "3   10648                  An Autumn Afternoon\n",
       "4   10868                    Atraco a las tres\n",
       "..    ...                                  ...\n",
       "81  21324               Two Half Times in Hell\n",
       "82  21462               Varan the Unbelievable\n",
       "83  21494                 Village of Daughters\n",
       "84  21673     What Ever Happened to Baby Jane?\n",
       "85  21761                Who's Got the Action?\n",
       "\n",
       "[86 rows x 2 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie.filter(movie['yr']==1962).select('id', 'title').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. When was Citizen Kane released?\n",
    "\n",
    "Give year of 'Citizen Kane'."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>yr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1941</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     yr\n",
       "0  1941"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie.filter(movie['title']=='Citizen Kane').select('yr').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Star Trek movies\n",
    "\n",
    "List all of the Star Trek movies, include the **id**, **title** and **yr** (all of these movies include the words Star Trek in the title). Order results by year."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>title</th>\n",
       "      <th>yr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>17770</td>\n",
       "      <td>Star Trek: First Contact</td>\n",
       "      <td>1996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>17771</td>\n",
       "      <td>Star Trek: Insurrection</td>\n",
       "      <td>1998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>17772</td>\n",
       "      <td>Star Trek: The Motion Picture</td>\n",
       "      <td>1979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>17773</td>\n",
       "      <td>Star Trek</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>17774</td>\n",
       "      <td>Star Trek Generations</td>\n",
       "      <td>1994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>17775</td>\n",
       "      <td>Star Trek II: The Wrath of Khan</td>\n",
       "      <td>1982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>17776</td>\n",
       "      <td>Star Trek III: The Search for Spock</td>\n",
       "      <td>1984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>17777</td>\n",
       "      <td>Star Trek IV: The Voyage Home</td>\n",
       "      <td>1986</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>17778</td>\n",
       "      <td>Star Trek Nemesis</td>\n",
       "      <td>2002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>17779</td>\n",
       "      <td>Star Trek V: The Final Frontier</td>\n",
       "      <td>1989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>17780</td>\n",
       "      <td>Star Trek VI: The Undiscovered Country</td>\n",
       "      <td>1991</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id                                   title    yr\n",
       "0   17770                Star Trek: First Contact  1996\n",
       "1   17771                 Star Trek: Insurrection  1998\n",
       "2   17772           Star Trek: The Motion Picture  1979\n",
       "3   17773                               Star Trek  2009\n",
       "4   17774                   Star Trek Generations  1994\n",
       "5   17775         Star Trek II: The Wrath of Khan  1982\n",
       "6   17776     Star Trek III: The Search for Spock  1984\n",
       "7   17777           Star Trek IV: The Voyage Home  1986\n",
       "8   17778                       Star Trek Nemesis  2002\n",
       "9   17779         Star Trek V: The Final Frontier  1989\n",
       "10  17780  Star Trek VI: The Undiscovered Country  1991"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyspark.sql.functions import *\n",
    "(movie.filter(lower(movie['title']).contains('star trek')).fillna(False)\n",
    "         .select('id', 'title', 'yr').toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. id for actor Glenn Close\n",
    "\n",
    "What **id** number does the actor 'Glenn Close' have?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    id\n",
       "0  140"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "actor.filter(actor['name']=='Glenn Close').select('id').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. id for Casablanca\n",
    "\n",
    "What is the **id** of the film 'Casablanca'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11768</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      id\n",
       "0  11768"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie.filter(movie['title']=='Casablanca').select('id').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Cast list for Casablanca\n",
    "\n",
    "Obtain the cast list for 'Casablanca'.\n",
    "\n",
    "> _what is a cast list?_  \n",
    "> The cast list is the names of the actors who were in the movie.\n",
    "\n",
    "Use **movieid=11768**, (or whatever value you got from the previous question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Peter Lorre</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>John Qualen</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Madeleine LeBeau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Jack Benny</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Dan Seymour</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Norma Varden</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Ingrid Bergman</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Conrad Veidt</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Leon Belasco</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Humphrey Bogart</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Sydney Greenstreet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Leonid Kinskey</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Wolfgang Zilzer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Claude Rains</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Curt Bois</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Leo White</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Ludwig StÃ¶ssel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Marcel Dalio</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Paul Henreid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Joy Page</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>S. Z. Sakall</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Dooley Wilson</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>William Edmunds</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Gregory Gaye</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Torben Meyer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Georges Renavent</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Jean Del Val</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Louis V. Arco</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Trude Berliner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Ilka GrÃ¼nig</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Richard Ryen</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Hans Twardowski</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  name\n",
       "0          Peter Lorre\n",
       "1          John Qualen\n",
       "2     Madeleine LeBeau\n",
       "3           Jack Benny\n",
       "4          Dan Seymour\n",
       "5         Norma Varden\n",
       "6       Ingrid Bergman\n",
       "7         Conrad Veidt\n",
       "8         Leon Belasco\n",
       "9      Humphrey Bogart\n",
       "10  Sydney Greenstreet\n",
       "11      Leonid Kinskey\n",
       "12     Wolfgang Zilzer\n",
       "13        Claude Rains\n",
       "14           Curt Bois\n",
       "15           Leo White\n",
       "16     Ludwig StÃ¶ssel\n",
       "17        Marcel Dalio\n",
       "18        Paul Henreid\n",
       "19            Joy Page\n",
       "20        S. Z. Sakall\n",
       "21       Dooley Wilson\n",
       "22     William Edmunds\n",
       "23        Gregory Gaye\n",
       "24        Torben Meyer\n",
       "25    Georges Renavent\n",
       "26        Jean Del Val\n",
       "27       Louis V. Arco\n",
       "28      Trude Berliner\n",
       "29        Ilka GrÃ¼nig\n",
       "30        Richard Ryen\n",
       "31     Hans Twardowski"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(casting.join(actor, casting['actorid']==actor['id'])\n",
    " .filter(\"movieid==11768\")\n",
    " .select('name').toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Alien cast list\n",
    "\n",
    "Obtain the cast list for the film 'Alien'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>John Hurt</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sigourney Weaver</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Yaphet Kotto</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Harry Dean Stanton</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Ian Holm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Tom Skerritt</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Veronica Cartwright</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  name\n",
       "0            John Hurt\n",
       "1     Sigourney Weaver\n",
       "2         Yaphet Kotto\n",
       "3   Harry Dean Stanton\n",
       "4             Ian Holm\n",
       "5         Tom Skerritt\n",
       "6  Veronica Cartwright"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = (movie.join(casting, movie['id']==casting['movieid'], how='right')\n",
    "    .join(actor, col('actorid')==actor['id'], how='left')\n",
    "    .drop(actor['id']))\n",
    "a.filter(\"title=='Alien'\").select('name').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Harrison Ford movies\n",
    "\n",
    "List the films in which 'Harrison Ford' has appeared"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A Hundred and One Nights</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Air Force One</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>American Graffiti</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Apocalypse Now</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Clear and Present Danger</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Cowboys &amp; Aliens</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Crossing Over</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Dead Heat on a Merry-Go-Round</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Extraordinary Measures</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Firewall</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Force 10 From Navarone</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Hanover Street</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Hawthorne of the U.S.A.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Hollywood Homicide</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Indiana Jones and the Kingdom of the Crystal S...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Indiana Jones and the Last Crusade</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Indiana Jones and the Temple of Doom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Jimmy Hollywood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>K-19: The Widowmaker</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>More American Graffiti</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Morning Glory</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Patriot Games</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Presumed Innocent</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Raiders of the Lost Ark</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Random Hearts</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Regarding Henry</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Sabrina</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Sally of the Sawdust</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Shadows</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Six Days Seven Nights</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Smilin' Through</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Star Wars Episode IV: A New Hope</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Star Wars Episode V: The Empire Strikes Back</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Star Wars Episode VI: Return of the Jedi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>The Conversation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>The Devil's Own</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>The Fugitive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>The Mosquito Coast</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>What Lies Beneath</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Witness</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Working Girl</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                title\n",
       "0                            A Hundred and One Nights\n",
       "1                                       Air Force One\n",
       "2                                   American Graffiti\n",
       "3                                      Apocalypse Now\n",
       "4                            Clear and Present Danger\n",
       "5                                    Cowboys & Aliens\n",
       "6                                       Crossing Over\n",
       "7                       Dead Heat on a Merry-Go-Round\n",
       "8                              Extraordinary Measures\n",
       "9                                            Firewall\n",
       "10                             Force 10 From Navarone\n",
       "11                                     Hanover Street\n",
       "12                            Hawthorne of the U.S.A.\n",
       "13                                 Hollywood Homicide\n",
       "14  Indiana Jones and the Kingdom of the Crystal S...\n",
       "15                 Indiana Jones and the Last Crusade\n",
       "16               Indiana Jones and the Temple of Doom\n",
       "17                                    Jimmy Hollywood\n",
       "18                               K-19: The Widowmaker\n",
       "19                             More American Graffiti\n",
       "20                                      Morning Glory\n",
       "21                                      Patriot Games\n",
       "22                                  Presumed Innocent\n",
       "23                            Raiders of the Lost Ark\n",
       "24                                      Random Hearts\n",
       "25                                    Regarding Henry\n",
       "26                                            Sabrina\n",
       "27                               Sally of the Sawdust\n",
       "28                                            Shadows\n",
       "29                              Six Days Seven Nights\n",
       "30                                    Smilin' Through\n",
       "31                   Star Wars Episode IV: A New Hope\n",
       "32       Star Wars Episode V: The Empire Strikes Back\n",
       "33           Star Wars Episode VI: Return of the Jedi\n",
       "34                                   The Conversation\n",
       "35                                    The Devil's Own\n",
       "36                                       The Fugitive\n",
       "37                                 The Mosquito Coast\n",
       "38                                               None\n",
       "39                                  What Lies Beneath\n",
       "40                                            Witness\n",
       "41                                       Working Girl"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "a.filter(a['name']=='Harrison Ford').select('title').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Harrison Ford as a supporting actor\n",
    "\n",
    "List the films where 'Harrison Ford' has appeared - but not in the starring role. [Note: the ord field of casting gives the position of the actor. If ord=1 then this actor is in the starring role]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A Hundred and One Nights</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>American Graffiti</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Apocalypse Now</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Cowboys &amp; Aliens</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Dead Heat on a Merry-Go-Round</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Extraordinary Measures</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Force 10 From Navarone</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Hawthorne of the U.S.A.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Jimmy Hollywood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>More American Graffiti</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Morning Glory</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Sally of the Sawdust</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Shadows</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Smilin' Through</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Star Wars Episode IV: A New Hope</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Star Wars Episode V: The Empire Strikes Back</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Star Wars Episode VI: Return of the Jedi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>The Conversation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Working Girl</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           title\n",
       "0                       A Hundred and One Nights\n",
       "1                              American Graffiti\n",
       "2                                 Apocalypse Now\n",
       "3                               Cowboys & Aliens\n",
       "4                  Dead Heat on a Merry-Go-Round\n",
       "5                         Extraordinary Measures\n",
       "6                         Force 10 From Navarone\n",
       "7                        Hawthorne of the U.S.A.\n",
       "8                                Jimmy Hollywood\n",
       "9                         More American Graffiti\n",
       "10                                 Morning Glory\n",
       "11                          Sally of the Sawdust\n",
       "12                                       Shadows\n",
       "13                               Smilin' Through\n",
       "14              Star Wars Episode IV: A New Hope\n",
       "15  Star Wars Episode V: The Empire Strikes Back\n",
       "16      Star Wars Episode VI: Return of the Jedi\n",
       "17                              The Conversation\n",
       "18                                  Working Girl"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "a.filter((a['name']=='Harrison Ford') & (a['ord']>1)).select('title').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Lead actors in 1962 movies\n",
    "\n",
    "List the films together with the leading star for all 1962 films."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A Kind of Loving</td>\n",
       "      <td>Alan Bates</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A Symposium on Popular Songs</td>\n",
       "      <td>Paul Frees</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A Very Private Affair (Vie PrivÃ©e)</td>\n",
       "      <td>Brigitte Bardot</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>An Autumn Afternoon</td>\n",
       "      <td>Chishu Ryu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Atraco a las tres</td>\n",
       "      <td>JosÃ© Luis LÃ³pez VÃ¡zquez</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>Two Half Times in Hell</td>\n",
       "      <td>Imre Sinkovits</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>Varan the Unbelievable</td>\n",
       "      <td>KÃ´zÃ´ Nomura</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>Village of Daughters</td>\n",
       "      <td>Eric Sykes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>What Ever Happened to Baby Jane?</td>\n",
       "      <td>Bette Davis</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>83</th>\n",
       "      <td>Who's Got the Action?</td>\n",
       "      <td>Dean Martin</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>84 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  title                        name\n",
       "0                      A Kind of Loving                  Alan Bates\n",
       "1          A Symposium on Popular Songs                  Paul Frees\n",
       "2   A Very Private Affair (Vie PrivÃ©e)             Brigitte Bardot\n",
       "3                   An Autumn Afternoon                  Chishu Ryu\n",
       "4                     Atraco a las tres  JosÃ© Luis LÃ³pez VÃ¡zquez\n",
       "..                                  ...                         ...\n",
       "79               Two Half Times in Hell              Imre Sinkovits\n",
       "80               Varan the Unbelievable               KÃ´zÃ´ Nomura\n",
       "81                 Village of Daughters                  Eric Sykes\n",
       "82     What Ever Happened to Baby Jane?                 Bette Davis\n",
       "83                Who's Got the Action?                 Dean Martin\n",
       "\n",
       "[84 rows x 2 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "a.filter((a['yr']==1962) & (a['ord']==1)).select('title', 'name').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11. Busy years for Rock Hudson\n",
    "\n",
    "Which were the busiest years for 'Rock Hudson', show the year and the number of movies he made each year for any year in which he made more than 2 movies."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>yr</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1961</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1953</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     yr  count\n",
       "0  1961      3\n",
       "1  1953      5"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "(a.filter(a['name']=='Rock Hudson')\n",
    "    .select('yr', 'title')\n",
    "    .groupBy('yr').count()\n",
    "    .filter(\"count>2\")\n",
    "    .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 12. Lead actor in Julie Andrews movies\n",
    "\n",
    "List the film title and the leading actor for all of the films 'Julie Andrews' played in.\n",
    "\n",
    "> _Did you get \"Little Miss Marker twice\"?_   \n",
    "> Julie Andrews starred in the 1980 remake of Little Miss Marker and not the original(1934).\n",
    ">\n",
    "> Title is not a unique field, create a table of IDs in your subquery"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "      <td>Dudley Moore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Darling Lili</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Despicable Me</td>\n",
       "      <td>Steve Carell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Duet for One</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Hawaii</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Little Miss Marker</td>\n",
       "      <td>Walter Matthau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Mary Poppins</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Relative Values</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Shrek the Third</td>\n",
       "      <td>Mike Myers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Star!</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>The Americanization of Emily</td>\n",
       "      <td>James Garner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>The Pink Panther Strikes Again</td>\n",
       "      <td>Peter Sellers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>The Princess Diaries</td>\n",
       "      <td>Anne Hathaway</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>The Princess Diaries 2: Royal Engagement</td>\n",
       "      <td>Anne Hathaway</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>None</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>The Tamarind Seed</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Thoroughly Modern Millie</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Tooth Fairy</td>\n",
       "      <td>Dwayne Johnson</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Torn Curtain</td>\n",
       "      <td>Paul Newman</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Victor Victoria</td>\n",
       "      <td>Julie Andrews</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       title            name\n",
       "0                                         10    Dudley Moore\n",
       "1                               Darling Lili   Julie Andrews\n",
       "2                              Despicable Me    Steve Carell\n",
       "3                               Duet for One   Julie Andrews\n",
       "4                                     Hawaii   Julie Andrews\n",
       "5                         Little Miss Marker  Walter Matthau\n",
       "6                               Mary Poppins   Julie Andrews\n",
       "7                            Relative Values   Julie Andrews\n",
       "8                            Shrek the Third      Mike Myers\n",
       "9                                      Star!   Julie Andrews\n",
       "10              The Americanization of Emily    James Garner\n",
       "11            The Pink Panther Strikes Again   Peter Sellers\n",
       "12                      The Princess Diaries   Anne Hathaway\n",
       "13  The Princess Diaries 2: Royal Engagement   Anne Hathaway\n",
       "14                                      None   Julie Andrews\n",
       "15                         The Tamarind Seed   Julie Andrews\n",
       "16                  Thoroughly Modern Millie   Julie Andrews\n",
       "17                               Tooth Fairy  Dwayne Johnson\n",
       "18                              Torn Curtain     Paul Newman\n",
       "19                           Victor Victoria   Julie Andrews"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "b = [x.movieid for x in a.filter(a['name']=='Julie Andrews').select('movieid').collect()]\n",
    "a.filter((a['movieid'].isin(b)) & (a['ord']==1)).select('title', 'name').toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 13. Actors with 15 leading roles\n",
    "\n",
    "Obtain a list, in alphabetical order, of actors who've had at least 15 **starring** roles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Adam Sandler</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Al Pacino</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Anthony Hopkins</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Antonio Banderas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Arnold Schwarzenegger</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>Tommy Lee Jones</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>Tyrone Power</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>Walter Matthau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>William Garwood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>William Holden</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                     name\n",
       "0            Adam Sandler\n",
       "1               Al Pacino\n",
       "2         Anthony Hopkins\n",
       "3        Antonio Banderas\n",
       "4   Arnold Schwarzenegger\n",
       "..                    ...\n",
       "95        Tommy Lee Jones\n",
       "96           Tyrone Power\n",
       "97         Walter Matthau\n",
       "98        William Garwood\n",
       "99         William Holden\n",
       "\n",
       "[100 rows x 1 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "(a.filter(a['ord']==1)\n",
    "    .groupBy('actorid', 'name')\n",
    "    .count()\n",
    "    .filter(\"count>=15\")\n",
    "    .select('name')\n",
    "     .filter(~isnull(col('name')))\n",
    "    .orderBy('name')\n",
    "    .toPandas())\n",
    "# a.filter(isnull(a.name)).toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 14.\n",
    "List the films released in the year 1978 ordered by the number of actors in the cast, then by title."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>The Bad News Bears Go to Japan</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>The Swarm</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Grease</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>American Hot Wax</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Boys from Brazil</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>Lies My Father Told Me</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>Same Time, Next Year</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>Somebody Killed Her Husband</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>That's Carry On!</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>The 36th Chamber of Shaolin</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>104 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                              title  count\n",
       "0    The Bad News Bears Go to Japan     50\n",
       "1                         The Swarm     37\n",
       "2                            Grease     28\n",
       "3                  American Hot Wax     27\n",
       "4              The Boys from Brazil     26\n",
       "..                              ...    ...\n",
       "99           Lies My Father Told Me      2\n",
       "100            Same Time, Next Year      2\n",
       "101     Somebody Killed Her Husband      2\n",
       "102                That's Carry On!      2\n",
       "103     The 36th Chamber of Shaolin      2\n",
       "\n",
       "[104 rows x 2 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "(a.filter(a['yr']==1978)\n",
    "    .groupBy('title', 'movieid')\n",
    "    .count()\n",
    "    .select('title', 'count')\n",
    "    .orderBy(col('count').desc(), 'title')\n",
    "    .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 15.\n",
    "\n",
    "List all the people who have worked with 'Art Garfunkel'."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Beverly Johnson</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bill Paxton</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Breckin Meyer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Bruce Jay Friedman</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Cecilie Thomsen</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Cindy Crawford</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Donald Trump</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Elio Fiorucci</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Ellen Albertini Dow</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Frederique van der Wal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Georgina Grenville</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Harris Yulin</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Heather Matarazzo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Heidi Klum</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Julian Sands</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Kurtwood Smith</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Lauren Hutton</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Lorna Luft</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Mark Ruffalo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Michael York</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Mike Myers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Neve Campbell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Peter Bogdanovich</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Robert DoQui</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Ron Jeremy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Ryan Phillippe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Salma Hayek</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Sela Ward</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Sherilyn Fenn</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Sherry Stringfield</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Sheryl Crow</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>Skipp Sudduth</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>Stars on 54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>Thelma Houston</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>Valerie Perrine</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>Veronica Webb</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      name\n",
       "0          Beverly Johnson\n",
       "1              Bill Paxton\n",
       "2            Breckin Meyer\n",
       "3       Bruce Jay Friedman\n",
       "4          Cecilie Thomsen\n",
       "5           Cindy Crawford\n",
       "6             Donald Trump\n",
       "7            Elio Fiorucci\n",
       "8      Ellen Albertini Dow\n",
       "9   Frederique van der Wal\n",
       "10      Georgina Grenville\n",
       "11            Harris Yulin\n",
       "12       Heather Matarazzo\n",
       "13              Heidi Klum\n",
       "14            Julian Sands\n",
       "15          Kurtwood Smith\n",
       "16           Lauren Hutton\n",
       "17              Lorna Luft\n",
       "18            Mark Ruffalo\n",
       "19            Michael York\n",
       "20              Mike Myers\n",
       "21           Neve Campbell\n",
       "22       Peter Bogdanovich\n",
       "23            Robert DoQui\n",
       "24              Ron Jeremy\n",
       "25          Ryan Phillippe\n",
       "26             Salma Hayek\n",
       "27               Sela Ward\n",
       "28           Sherilyn Fenn\n",
       "29      Sherry Stringfield\n",
       "30             Sheryl Crow\n",
       "31           Skipp Sudduth\n",
       "32             Stars on 54\n",
       "33          Thelma Houston\n",
       "34         Valerie Perrine\n",
       "35           Veronica Webb"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a was obtained in #7\n",
    "b = [x.movieid for x in a.filter(a['name']=='Art Garfunkel')\n",
    "     .select('movieid').collect()]\n",
    "(a.filter((a['movieid'].isin(b)) & (a['name'] != 'Art Garfunkel'))\n",
    " .select('name')\n",
    " .orderBy('name').toPandas())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.stop()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
